print(Sys.time())

options(stringsAsFactors=F)

suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(stm))
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(parallel))
suppressPackageStartupMessages(library(tm))
suppressPackageStartupMessages(library(Matrix))
suppressPackageStartupMessages(library(glmnet))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(text2vec))
## suppressPackageStartupMessages(library(randomForest))
suppressPackageStartupMessages(library(irr))
suppressPackageStartupMessages(library(caret)); suppressPackageStartupMessages(library(e1071))
suppressPackageStartupMessages(library(ROCR))

## install_github("wilryh/parrot", dependencies=TRUE)
library(parrot)

load(
    file="data/en_tweets_2009-05-09_to_2016-11-08_dimensions_election16_remove_news_all_docs_only_replicate_psrm.RData"
)
docs <- document_scores_election16

labeled_tweets <- read.csv("data/fulloutput_900x3_450LR.csv", colClasses="character")
load("data/just_ids_900.Rda")
just_ids_900$tweet_id <- as.character(just_ids_900$tweet_id_NEW)

labeled_tweets <- left_join(
    labeled_tweets,
    just_ids_900,
    by="tweet_id"
)
labeled_tweets$tweet_id <- labeled_tweets$tweet_id_ORIG

entertainment <- aggregate(category=="entertainment" ~ tweet_id, data = labeled_tweets, FUN = mean)
names(entertainment)[2] <- "entertainment"

politics <- aggregate(category=="politics" ~ tweet_id, data = labeled_tweets, FUN = mean)
names(politics)[2] <- "politics"

social_justice <- aggregate(category=="social_justice" ~ tweet_id, data = labeled_tweets, FUN = mean)
names(social_justice)[2] <- "social_justice"

other <- aggregate(category=="other" ~ tweet_id, data = labeled_tweets, FUN = mean)
names(other)[2] <- "other"

comb <- list(entertainment, politics, social_justice, other) %>%
        reduce(full_join, by="tweet_id")


ordered_docs <- inner_join(docs, comb %>% rename(tweetid = tweet_id), by="tweetid")

ordered_docs$other <- with(
    ordered_docs,
    as.integer((entertainment < 0.5 & politics < 0.5 & social_justice < 0.5) | other >= 0.5)
)




## if (party != "overall") {
##     ordered_docs_sub <- subset(ordered_docs, account_category == party)
## } else {
## }

## all_ordered_docs <- list()
## category_dfs <- list()
set.seed(987654321)
category_kappas_df_overall <- data.frame()
category_kappas_df_RightTroll <- data.frame()
category_kappas_df_LeftTroll <- data.frame()
category_aucs_df_overall <- data.frame()
category_aucs_df_RightTroll <- data.frame()
category_aucs_df_LeftTroll <- data.frame()
category_icc_df_overall <- data.frame()
category_icc_df_RightTroll <- data.frame()
category_icc_df_LeftTroll <- data.frame()
for (i in 1:100) {

    ordered_docs_sub <- ordered_docs

    ## print(i)

    ordered_docs_sub$test <- sample(c(TRUE, FALSE), size = nrow(ordered_docs_sub), replace=T)

    mod_social_justice <- cv.glmnet(
        x=as.matrix(ordered_docs_sub[!is.na(ordered_docs_sub$social_justice) & !ordered_docs_sub$test,paste0("X", 0:9)]),
        y=ordered_docs_sub[!is.na(ordered_docs_sub$social_justice) & !ordered_docs_sub$test,]$social_justice>=0.5,
        family="binomial"
    )
    ##
    ordered_docs_sub$social_justice_pred <- as.vector(predict(mod_social_justice, newx=as.matrix(ordered_docs_sub[,paste0("X", 0:9)]), s="lambda.min", type="response"))

    mod_politics <- cv.glmnet(
        x=as.matrix(ordered_docs_sub[!is.na(ordered_docs_sub$politics) & !ordered_docs_sub$test,paste0("X", 0:9)]),
        y=ordered_docs_sub[!is.na(ordered_docs_sub$politics) & !ordered_docs_sub$test,]$politics>=0.5,
    family="binomial"
    )
    ##
    ordered_docs_sub$politics_pred <- as.vector(predict(mod_politics, newx=as.matrix(ordered_docs_sub[,paste0("X", 0:9)]), s="lambda.min", type="response"))

mod_entertainment <- cv.glmnet(
    x=as.matrix(ordered_docs_sub[!is.na(ordered_docs_sub$entertainment) & !ordered_docs_sub$test,paste0("X", 0:9)]),
    y=ordered_docs_sub[!is.na(ordered_docs_sub$entertainment) & !ordered_docs_sub$test,]$entertainment>=0.5,
    family="binomial"
)
    ordered_docs_sub$entertainment_pred <- as.vector(predict(mod_entertainment, newx=as.matrix(ordered_docs_sub[,paste0("X", 0:9)]), s="lambda.min", type="response"))


    mod_other <- cv.glmnet(
    x=as.matrix(ordered_docs_sub[!is.na(ordered_docs_sub$other) & !ordered_docs_sub$test,paste0("X", 0:9)]),
    y=ordered_docs_sub[!is.na(ordered_docs_sub$other) & !ordered_docs_sub$test,]$other>=0.5,
    family="binomial"
)
    ##
    ordered_docs_sub$other_pred <- as.vector(predict(mod_other, newx=as.matrix(ordered_docs_sub[,paste0("X", 0:9)]), s="lambda.min", type="response"))


    for (party in c("overall","LeftTroll","RightTroll")) {

        ## print(party)

        category_df <- list()
        category_df_aucs <- list()
        category_df_icc <- list()
        for (category in c("entertainment","politics","social_justice","other")) {

            if (party != "overall") {
            ordered_docs_sub_test <- subset(
                    ordered_docs_sub,
                    test & account_category == party
                )
            } else {
                ordered_docs_sub_test <- subset(
                    ordered_docs_sub,
                    test
                )
            }
            x_f <- factor(as.integer(
                ordered_docs_sub_test[,paste0(category, "_pred")] >= 0.5),
                levels=c("1","0")
                )
            y_f <- factor(
                as.integer(ordered_docs_sub_test[,category] >= 0.5),
                levels=c("1","0")
            )

            pred <- prediction(
                1-ordered_docs_sub_test[,paste0(category, "_pred")],
                as.integer(y_f)
            )
            perf <- performance(pred, "auc")

            category_df_aucs[[category]] <- perf@y.values
            category_df_icc[[category]] <- icc(
                cbind(ordered_docs_sub_test[,category],
                      ordered_docs_sub_test[,paste0(category, "_pred")]),
                model="twoway", type="agreement"
            )$value
            category_df[[category]] <- data.frame(
                kappa = confusionMatrix(
                    data = x_f,
                    y_f
                )$overall["Kappa"]
            )

        }

        assign(
            paste0("category_kappas_", party),
            t(do.call("rbind", category_df))
        )
        assign(
            paste0("category_aucs_", party),
            unlist(category_df_aucs)
        )
        assign(
            paste0("category_icc_", party),
            unlist(category_df_icc)
        )

    }

    category_kappas_df_RightTroll <- rbind(
        category_kappas_df_RightTroll,
        category_kappas_RightTroll
    )
    category_kappas_df_LeftTroll <- rbind(
        category_kappas_df_LeftTroll,
        category_kappas_LeftTroll
    )
    category_kappas_df_overall <- rbind(
        category_kappas_df_overall,
        category_kappas_overall
    )
    ##
    category_aucs_df_RightTroll <- rbind(
        category_aucs_df_RightTroll,
        category_aucs_RightTroll
    )
    category_aucs_df_LeftTroll <- rbind(
        category_aucs_df_LeftTroll,
        category_aucs_LeftTroll
    )
    category_aucs_df_overall <- rbind(
        category_aucs_df_overall,
        category_aucs_overall
    )
    ##
    category_icc_df_RightTroll <- rbind(
        category_icc_df_RightTroll,
        category_icc_RightTroll
    )
    category_icc_df_LeftTroll <- rbind(
        category_icc_df_LeftTroll,
        category_icc_LeftTroll
    )
    category_icc_df_overall <- rbind(
        category_icc_df_overall,
        category_icc_overall
    )
}



cat("\n#### ####")
cat("\n#### Table A9\n")

kappa_table <- data.frame(
    round(colMeans(category_kappas_df_LeftTroll, na.rm=T), 2),
    round(colMeans(category_kappas_df_RightTroll, na.rm=T), 2),
    round(colMeans(category_kappas_df_overall, na.rm=T), 2)
)
print(xtable::xtable(kappa_table))

colSds <- function(x) {
    apply(x, 2, sd, na.rm=T)
}

kappa_table <- data.frame(
    round(colSds(category_kappas_df_LeftTroll), 2),
    round(colSds(category_kappas_df_RightTroll), 2),
    round(colSds(category_kappas_df_overall), 2)
)
xtable::xtable(kappa_table)

cat("\n#### ####")
cat("\n#### Table A11\n")
auc_table <- data.frame(
    round(colMeans(category_aucs_df_LeftTroll, na.rm=T), 2),
    round(colMeans(category_aucs_df_RightTroll, na.rm=T), 2),
    round(colMeans(category_aucs_df_overall, na.rm=T), 2)
)
print(xtable::xtable(auc_table))

auc_table <- data.frame(
    round(colSds(category_aucs_df_LeftTroll), 2),
    round(colSds(category_aucs_df_RightTroll), 2),
    round(colSds(category_aucs_df_overall), 2)
)
xtable::xtable(auc_table)

cat("\n#### ####")
cat("\n#### Table A9\n")

icc_table <- data.frame(
    round(colMeans(category_icc_df_LeftTroll, na.rm=T), 2),
    round(colMeans(category_icc_df_RightTroll, na.rm=T), 2),
    round(colMeans(category_icc_df_overall, na.rm=T), 2)
)
print(xtable::xtable(icc_table))

icc_table <- data.frame(
    round(colSds(category_icc_df_LeftTroll), 2),
    round(colSds(category_icc_df_RightTroll), 2),
    round(colSds(category_icc_df_overall), 2)
)
xtable::xtable(icc_table)

print(Sys.time())
